



* Get a list of all sein-seinunit-qtimes in UMETRICS-PHF.
* This is necessary because some sein-seinunit-qtimes in the UMETRICS-PHF are not in the seinunit ECF. Why is this?
* We need to impute steinutnit NAICS for these.

cd $datadir/intermediate_files
use phf_interleave_b_2018_usiris_2018q4_LONG, clear
keep sein seinunit qtime
duplicates drop
rename seinunit1 seinunit
tempfile sein_seinunit_qtime
save `sein_seinunit_qtime', replace

cd $datadir/raw_pulls/lehd_usiris
use ecf_interleave_seinunit_t13_2018_usiris_2018q4, clear
keep sein seinunit qtime naics2017fnl naics2012fnl naics2007fnl
merge 1:1 sein seinunit qtime using `sein_seinunit_qtime'
drop if _merge == 1

* Impute missing NAICS codes
keep sein seinunit qtime naics2017fnl naics2012fnl naics2007fnl
sort sein seinunit qtime

local years 2007 2012 2017
foreach year in `years' {

by sein seinunit, sort: stripolate naics`year'fnl qtime, gen(naics`year'fnl_forward) forward
by sein seinunit, sort: stripolate naics`year'fnl qtime, gen(naics`year'fnl_backward) backward

gen naics`year'fnl_imp = ""
replace naics`year'fnl_imp = naics`year'fnl if !missing(naics`year'fnl)
replace naics`year'fnl_imp = naics`year'fnl_forward if naics`year'fnl_forward == naics`year'fnl_backward & missing(naics`year'fnl_imp)
replace naics`year'fnl_imp = naics`year'fnl_forward if missing(naics`year'fnl_backward) & missing(naics`year'fnl_imp)
replace naics`year'fnl_imp = naics`year'fnl_backward if missing(naics`year'fnl_forward) & missing(naics`year'fnl_imp)

gen gap = missing(naics`year'fnl_imp)
by sein seinunit, sort: egen median_gap_qtime = median(qtime)

replace naics`year'fnl_imp = naics`year'fnl_forward if qtime < median_gap_qtime & gap == 1 & missing(naics`year'fnl_imp)
replace naics`year'fnl_imp = naics`year'fnl_backward if qtime > median_gap_qtime & gap == 1 & missing(naics`year'fnl_imp)

set seed 1234
gen double rand = runiform()
replace naics`year'fnl_imp = naics`year'fnl_forward if qtime == median_gap_qtime & gap == 1 & rand > 0.5 & missing(naics`year'fnl_imp)
replace naics`year'fnl_imp = naics`year'fnl_backward if qtime == median_gap_qtime & gap == 1 & rand <= 0.5 & missing(naics`year'fnl_imp)


drop naics`year'fnl_forward naics`year'fnl_backward gap median_gap_qtime rand

}

sort sein seinunit qtime
compress

cd $datadir/intermediate_files
save sein_seinunit_qtime_naics_imp, replace
